#: Used To Comment (Single Line)
# "" : Used To Comment (Multi Line)
# Question: What is the use of commenting?


# RStudio

" RStudio "

" RStudio Works on R
  RStudio is user friendly
  RStudio has 4 Panes "

# RStudio Works on R
# RStudio is user friendly
# RStudio has 4 Panes



# Variable Creation
# Syntax:

# <variable name> = <value / expression>

" Variable name should start from Letters,
 and continue with letters, number, _ , . "

# Example:
# If X ~ U(a = 0, b = 0.5)

a = 0.0
b = 0.5

alpha = 0.05

a
print(a)

# Question: Why do we need variables?

x X
# R is Case Sensitive
# Meaning: Small and Cap letters have different meaning

# Example:
# If X ~ Bin(n = 10, p = 0.5)

n = 10
p = 0.5

print(p)



# The print() is used to output the value(s) 
# inside a variable

# A function is a keyword associated
# with ()


# R Overwrites variable
# Meaning:
" If a variable with a particular name
  exits and a new variable is created with same name,
  then the previous information will be replaced
  in place of the new "

# Example:
# If X ~ exp(lambda)

lambda = 0.15
lambda = 0.40

# If Y ~ Gamma(alpha, lambda)

alpha = 10
lambda = 0.39

Lambda = 0.55


# Data Types

# 1.) Numeric

# Example of Numeric variable
# If X ~ exp(lambda)

lambda = 0.15
marks_bor = 87

class(lambda)
class(marks_bor)


# 2.) String / Character

# We always specify a string / character
# value in " / '

# Example:
continuous_dist = "Normal"
'name = "Tanish"'


class("lambda")


# 3.) Boolean / Logical

# Can take only 2 values, True & False
# Keyword for True:
TRUE / T


# Keyword for False:
FALSE / F

# In R, F = 0 & T = 1
# F -> 0 & T -> 1

TRUE + TRUE
1 + 1

T / T
1 / 1

F / F
0 / 0


# Example:
is_H0_true = TRUE
is_H0_true = T
is_H0_true = F
is_H0_true = FALSE

class(is_H0_true)


# Numeric Operators

# Addition (+)

# Example:
" A student scored 87 and 67 in his
  Actuarial paper for Part A & B simultaneously
  Calculate his total scores. "

A = 87
B = 67

Total = A + B

print(Total)
print(A + B)
print(87 + 67)


# Subtraction (-)

# Example:
" An Actuarial student scored 67 marks in his paper
 The Pass Mark was 59 for the same
 Calculate the margin by which the student has cleared "

marks_scored = 67
pass_mark = 59

margin = 67 - 59

print(margin)
print(marks_scored - pass_mark)
print(67 - 59)


# Multiplication (*)
# If X ~ Bin(n = 10,p = 0.35)
# Calculate the Variance of X

n = 10
p = 0.35

variance = n * p * (1 - p)
mean = n * p

# Division (/)
# If X ~ U(a = 2,b = 5)
# Find F(X = 3.5)

x = 3.5
a = 2
b = 5

F_3.5 = x - a / b - a


3.5 - (2/5) - 2
3.5 - 0.4 - 2
print(F_3.5)


()
[]
{}


False()





F_3.5 = (x - a) / (b - a)
print(F_3.5)

# NOTE: R uses BODMAS rule


# Power (^ / **)
# If X ~ U(a = 1.5,b = 5.8)
# Find the variance of X

a = 1.5
b = 5.8

variance = ((b - a)^2) / 12
print(variance)

variance = ((b - a)**2) / 12
print(variance)


# MODULO (%%)
# Provides the remainder after performing division

# Example:
" An Actuary is trying to model the Claims
  distribution

  She has 11 possible distributions
  and she's testing 3 distributions at one go

  How many distributions will remain at the end? "

dist_left = 11 %% 3

print(dist_left)


" Quiz "

# Identify the type of variables below

distribution = "Weibull"     
no_of_parameters = "2.034"       
alpha = 2                    
lamda = T


# Comment whether the below variables
# name(s) are valid or not
_x
x_
x._
_.x
1x
x1
x11
x.1_
x_.
Var_2
Var@2

# Check
_x = 10
x_ = 10
x._ = 10
_.x = 10
1x = 10
x1 = 10
x11 = 10
x.1_ = 10
x_. = 10


" Q1.) Calculate AND print the squared deviations of the 
      following obs: 23, 32, 26 "


x = c(23,32,26)

(x - mean(x))^2


# Step 1: 
# Step 2:
# Step 3:
# Step 4:


x_bar = (23 + 32 + 26) / 3

(23 - x_bar)^2
(32 - x_bar)^2
(26 - x_bar)^2



" Q2.) An Actuary has 984 chocolates which he wants to distributes 
       among his 456 friends on his Birthday
       He wonders, how many minimum chocolates will be left if
       all his friends are given equal no. of chocolates "

(984 %% 456) * 18



" Q3.) Calculate the PDF/PMF value for x = 0.55
       a.) X ~ N(mu = 5, sigma = 12)
       b.) X ~ exp(lambda = 0.85)
       
       For x = 4,
       c.) X ~ Poi(lambda = 3.4) "

mu = 5 ; sigma = 12
x = 0.55

FIRST = 1 / sqrt(2 * pi * sigma^2)
SECOND = (-0.5) * ((x - mu) / sigma)^2

FIRST * exp(SECOND)



dnorm(1,)
dnorm(x = 0.55,
      mean = 5,
      sd = 12,
      log = T)

# Hint:
# We use exp() to computes the exponential values
# The keyword "pi" = Mathematical Pi
# To calculate factorial, use factorial()

print(exp(1))
print(pi)


# Data Structures

# 1.) Vector / Array

" An Array consists of elements
  having homogeneous data type "

# Syntax:
# <var name> = c(<data sep with ,>)

no_of_papers = c(2,5,3,4,4)

name_of_students = c("Ayush",
                     "Priyanshi",
                     "Raj",
                     "Samar",
                     "Hetvi")

homogenity_check = c(12.55,
                     6.81,
                     "Normal",
                     "Chi-Sq")

class(homogenity_check)

" str(<name>) helps to identify
  the type of Data Structure "


# 2.) Matrix

" Matrix has similar properties like
  vector, but it is a 2-D Array "

# Syntax:
# <var name> = matrix(<data>,
#                      nrow = ,
#                      ncol = ,
#                      byrow = )

matrix(c(0.5,1,4,2),
       ncol = 2)



# Example:
" Assume that we want to simulate data
  from Gamma dist. based on few input
  parameters of alpha, lambda
  i.e X ~ Ga(alpha, lambda) "

# Store all required values in a matrix
# which can then be fed to the algo

para = matrix(nrow = 3,
              data = c(6,0.55,
                2.5,0.13,
                4,0.75))

print(para)


para = matrix(c(6,0.55,
                2.5,0.13,
                4),
              3,2,T)

print(para)


colnames(para) = c("Alpha",
                   "Lambda")

print(para)

# colnames(<name>) helps to add custom
# column names to Matrix / DataFrame

# Similarly,
# rownames(<name>) helps to add custom
# row names to Matrix / DataFrame


# 3.) Data Frame

" A DataFrame can be thought of,
 as a collection of Array "

" DataFrame has the power to hold
 various columns of different data types "

# Syntax:
# <var name> = data.frame(<vector 1>,
#                         <vector 2>,
#                         <vector n>)

# Example:
" Store the name of students and the
  no. of papers cleared (from last example)
  in a dataframe "

paper_info = data.frame(name_of_students,
                        no_of_papers)

paper_info

colnames(paper_info) = c("Name",
                         "Paper_Count")


rownames(paper_info) = c("A1",
                         "A2",
                         "A3",
                         "A4",
                         "A5")
print(paper_info)

str(paper_info)


# 4.) List

" A List is a Structure,
  which can hold all DATA STRUCTURE
  at once "

" Which means that a LIST can hold a
  list, vector, matrix, dataframe
  all at once "

# Syntax:
# <var name> = list(<data 1>,
#                   <data 2>,
#                   <data n>)

list_trial = list(no_of_papers,
                  name_of_students,
                  para,
                  paper_info)

print(list_trial)


" Quiz "

" 1.) Can we have a list within a list? "

" 2.) Which data strcuture has rows & columns? "

" 3.) Which data structure is a 1-D Array? "

" 4.) Can a vector hold 2 data types together?
      Explain the consequencies of the same. "

" 5.) Which structure can hold Multiple data
      types together. Give proper reasoning. "


" Excercise "

" 1.) Create the following Probability matrix,
      
            Sunny   Rainy
   Sunny     0.5     0.5
   Rainy     0.3     0.7   "


" 2.) Store the above matrix in a list "

" 3.) Create a DataFrame as shown below, 
     
   Sepal.Length Sepal.Width Petal.Length Petal.Width Species
           5.1         3.5          1.4         0.2  setosa
           4.9         3.0          1.4         0.2  versi
           4.7         3.2          1.3         0.2  setosa
           4.6         3.1          1.5         0.2  versi  "


" 4.) Add the above DataFrame in list created in 2.)
      Your list should contain 2 data strcuture
      now "


" Indexing "

" Each element in an array or a 
  DataFrame has its location, which
  can be specified in terms of numeric location "

" In R, Indexing starts from 1 "

" 1-D Array "
" Below are the given distr taken into
  consideration to model Claims "

distr = c("Normal","t-dt","F","Chi-Sq")
#           1        2     3     4      5


" Give the name of the distribution
 which will be used to model for the 4th & 10th time "

distr[1,4]
distr[110]


" Matrix "
" Below is the Transition Probability Matrix "

TPM = matrix(c(0.2,0.8,
               0.6,0.4),
             ncol = 2,
             byrow = T)

TPM[2 , 1]
TPM[ ,2]
TPM[2, ]


" DataFrame "
" Extract the first 2 columns & first 3 rows of the
  iris data "

data = iris

data[1:3,1:2]
data[c(1,2,3),c(1,2)]


" List "
" From the list in the above example,
  answer the following,
 
  1.) Probability of Sunny - Sunny in 1 step from TPM
  2.) 3rd row of the dataframe "


list_trial[[4]][3,]

list_trial[3]
list_trial[[3]]


list_trial[[4]][3,]



" Logical Operators "

" A condition is being checked giving
  a boolean output "

# < / <= (less than / less than equal to)
# > / >= (greater than / greater than equal to)
# == (equal to)
# != (not equal to)


no_of_papers = c(2,3,4,3,1)

 ! no_of_papers >= 3


# & (AND)
# | (OR)
# ! (NOT)


" An Actuary is trying to Model the distribution 
  of Claim amounts.
  He assumes the distribution to be Normal

  He used 2 methods to estimate the parameters
  1.) Method of Moments
  2.) Maximum Likelihood Estimation

  He wants to know how both these estimates actually
  differs from each other "


mu_mom = runif(1,25000,30000)
mu_mle = runif(1,25000,30000)

sigma_mom = runif(1,2500,3000)
sigma_mle = runif(1,2500,3000)

" < / <= (less than / less than equal to) "
mu_mom < mu_mle
sigma_mom <= sigma_mle

" > / >= (greater than / greater than equal to) "
mu_mom > mu_mle
sigma_mom >= sigma_mle


" == (equal to) "
mu_mom == mu_mle
sigma_mom == sigma_mle


" != (not equal to) "
mu_mom != mu_mle
sigma_mom != sigma_mle


" The Actuary has given 3 papers in his previous attempts
  for which the results are out just now

  He has downlaoded the results and will decide to party if, "

result_1 = sample(c("P","F"),1)
result_2 = sample(c("P","F"),1)
result_3 = sample(c("P","F"),1)


# Use of AND operator (&)

" Evaluates 2 or more logical value
  and summarising them into 1 boolean value "

# No parties if he don't pass in any 1 of them

(result_1 == "P") & (result_2 == "P") & (result_3 == "P")


# Use of OR operator (|)

" Evaluates 2 or more logical value
  and summarising them into 1 boolean value "

# He will party if he pass in any one of them 

(result_1 == "P") | (result_2 == "P") | (result_3 == "P")


# Use of NOT operator (!)
" NOT take an value opposite of the true Boolean value
  Eg, NOT will make True equal to False and vice versa "

" He will party irrespective of the results
  The above question is covered in Quiz at 
  the end of the script "

!(result_1 == "P" & result_1 == "F")


" Excercise "

" Q1.) An Actuary has 984 chocolates which he
       wants to distributes among his 456 
       friends on his Birthday. He wonders, 
       how many minimum chocolates will be left if
       all his friends are given equal no. of chocolates "


" Q2.) Later, he found that he's bag can only accomodate 
       (((59**3 - 40) %/% 66) %/% 110) chocolates 
       Comment on the same "



" If - Else "

" If - Else functionality helps us to alter / modify / 
  generate
  output based on some conditions that are to be satisfied

  For eg.
  How does a software decides to print 'P' or 'F' in
  the exam result letter of an Actuarial Exam? "


# Syntax for If - Else:
# if(<condition>){
#    <block of codes>
#    }else{
#    <block of codes>
#     }


# Syntax for If - Else If:
# if(<condition-1>){
#    <block of codes>
#    }else if(<condition-2>){
#    <block of codes>
#    }
#    .
#    .
#    .
# else{
#    <block of codes>
#     }


# Eg.
" The Actuarial exam committee decides that the pass mark
  for CM1 will be 62.
  Write down the grade which will printed in the
  exam result letter "

pass_mark = 62
marks_scored = 61


if(marks_scored >= pass_mark){
  print("Result: Pass")
}else{
  print("Result: Fail")
}


[75,100] : Pass-A
[60,74] : Pass

if(marks_scored >= 75){
  print("Pass - A")
}else if(marks_scored >= pass_mark & marks_scored < 75){
  print("Pass")
}else{
  print("Fail")
}



# Eg.
" Now the committee decides if any one is on the border line
  their paper needs to be rechecked
  Tag their Grade Status appropriately now... 

  Boderline meaning, in the range of -1, + 1
  of the Pass mark. "

# Range: 59, 60, 61
pass_mark = 60
marks_scored = 62

if(marks_scored >= pass_mark + 2){
  print("Pass")
}else if(marks_scored >= pass_mark - 1 &
         marks_scored <= pass_mark + 1){
  print("Re-Check")
}else{
  print("Fail")
}



" Below is the salary amount (in lakhs).
  Please alott appropriate band and comment. 

  Salary = 17.8 

  Salary <= 2.5  : Band 1
  Salary (2.5 - 5] : Band 2
  Salary (5 - 10] : Band 3
  Salary > 10 : Band 4 "


salary = 1

if(salary <= 10){
  print("B3")
}else if(salary <= 5){
  print("B2")
}else if(salary <= 2.5){
  print("B1")
}else{
  print("B4")
}


salary = 15

if(salary > 0 & salary <= 2.5){
  print("B1")
}else if(salary > 2.5 & salary <= 5){
  print("B2")
}else if(salary > 5 & salary <= 10){
  print("B3")
}


if(salary <= 2.5){
  print("B1")
}else if(salary <= 5){
  print("B2")
}else if(salary <= 10){
  print("B3")
}else{
  print("B4")
}



if(salary <= 2.5){
  print("B-1")
}
if(salary <= 5){
  print("B-2")
}
if(salary <= 10){
  print("B-3")
}


x = 2

if(x > 3){
  print("True")
}



" Nested If - Else "

# Eg.
" The Actuarial Institute now decides to have
  a top 1% tag on the result letter
  Incorporate the same accordingly. "

marks_scored = 96
pass_mark = 60
top1p = 76


if(marks_scored >= pass_mark){
  
  if(marks_scored >= top1p){
    print("Pass-1%")
  }else{
    print("Pass")
  }
  
}else{
  print("Fail")
}



if(marks_scored >= pass_mark){
  
  if(marks_scored >= top1p){
    print("Pass 1%")
  }else{
    print("Pass")
  }
  
}else{
  
  if(marks_scored <= 30){
    print("F : D")
  }else{
    print("F")
  }
}


" Alternative "


" Loops "

" Loops are used when we want to perform some
  specified activity / operations a no. of times
  which may or may not be known "


" For Loop "

" To be used when the no. of iterations OR
  no. of times to be executed is known "

# Syntax:
# for(iterator){
#    <block of codes>
#  }

x = c("Normal","Chi-Sq","t-dt")

for(i in 1:length(x)){
  print(x[i])
}


ncol(iris)
nrow(iris)


# Eg.
# Calculate the squared deviations for the
# following data and store all info
# in a variable named SQ.DN,
# Data = 1,3,5,...,99


x = seq(1,99,2)
SQ.DN = rep(0,length(seq(1,99,2)))


for(i in 1:length(seq(1,99,2))){
  
  SQ.DN[i] = (x[i] - mean(x))^2

}



" While Loop "

" To be used when the no. of iterations OR
  no. of times to be executed is unknown "

# Syntax:
# while(condition){
#    <block of codes>
#    <dummy>
#  }

# Eg.
" Print Random no. in the range [-100,10000],
  until an unless a no. less than -50 is generated "

no = 9999999

while(no >= -50){
  
  no = sample(seq(-100,10000,1),1)
  print(no)
  
}


" Nested Loops "

" One or more loop(s) within another loop
  is said to be Nested Loop(s) "


# Syntax:
# for(iterator){
#    for(iterator){
#       <block of codes>
#   }
# }


# Eg.
" You are working with a 2X3 matrix.
  Print all indexes of the matrix "



" Exercise "

" Q1.)
  The Actuarial Society decides to declare only the grades,
  instead of marks
  And the students who have the grade 'F' will be given another
  grade based on how close the score was to the passing mark

  F-Grade sheet:
  5 marks less = A
  10 marks less = B
  More than 10 marks less = C "


" Q2.) 
  Calculate the SD(sigma) for the following data using
  For Loop & While Loop,
  Data = 2,5,8,11,...,98 "



" Functions In R "

" length() "
" Provides the No. of elements available "

" For Eg.
  Provide the no. of elements in the below vector "

count = c(13,24,54,56,NA,12,NA,23,"Rohit")
length(count)


" sum() "
" To add a list of no. "

marks_scored = c(78,89,76,91,40)
sum(marks_scored)


marks_scored = c(78,89,76,NA,91,40)
sum(marks_scored)

sum(marks_scored,na.rm = T)


" We use na.rm = T to compute the required
  results ignoring ALL THE NAs "

marks_scored = c(78,89,76,NA,91,40)

sum(marks_scored,na.rm = T) / length(marks_scored)
mean(marks_scored,na.rm = T)




" prod() "
" To multiply a list of no. "


prod(seq(1,5,0.5))



c(1,2,3,4,5) >= c(3,1)


" NOTE: "
" Addition / Multiplication of 2 vectors of
  unequal length "

marks_R = c(34,56,76,67,71)
marks_SAS = c(54,34,11)

marks_R + marks_SAS

c(1,2,3,4) + c(10,2)



price = c(12,43,12,33,44)
quantity = c(12,44)

price * quantity


" which() "
" Returns the Index values, where some specified
  condition(s) is/are TRUE "

" For Eg. 
  Find whether or not Pareto Dist is present in 
  the below vector, and if yes, it is
  present at which Index "

dist = c("Normal","Chi-Sq","t-dt","F-dt","Pareto",
         "FGM","Pareto")

which(dist == "Pareto" | dist == "Normal")

no = c(12,23,45,67,32)
which(no <= 50)


" Matrix Multiplication (%*%) "

Probability_Matrix = matrix(c(0.15,0.85,
                              0.4,0.6),
                            ncol = 2,
                            byrow = T)


Probability_Matrix * Probability_Matrix
Probability_Matrix %*% Probability_Matrix


" The above code provides simple
  Multiplication and not 
  Matrix Multiplication "


Probability_Matrix %*% Probability_Matrix



" seq(from, to, by) "

" To create a sequence of no. 
  by providing the start, end and the 
  in between spacing of the no. "

" For eg. Create a sequence of no.
  from 1.5 to 10.5 at a step of 0.5 "

seq(from = 1.5,to = 10.8,by = 0.5)
seq(1.5,10.5,0.5)

?sample()



" remove() "

" To remove a particular variable from 
  R's enviroment "

remove(x)

" In order to remove all variables 
  from enviroment use, "

rm(list = ls())

?ls()


" paste() "

" Concatenate a no. of Strings or Numbers "

" For Eg.
  Print the total marks scored by the student"


total = 87
paste("The total marks scored is",total,sep = " = ")


paste("Good"," ","Morning",",","Class",sep = "")


name = "Tanish"
surname = "Telisara"

paste(name,surname,sep = ".")

" The sep = , is used to provide the element
  from which we want to separate the statements "



" as.numeric() "

" Used to convert a vector into Numeric data type "

no. = c("10","34","31","54")
class(no.)

no_after_conv = as.numeric(no.)
class(no_after_conv)


dist = c("34","Normal","Chi-Sq","t-dt",
         "F-dt","Pareto","FGM")

as.numeric(dist)


" as.character() "

" Used to convert a vector into Character data type "

class(no_after_conv)

no_after_conv_chr = as.character(no_after_conv)
class(no_after_conv_chr)



" is.na() "

" Returns a Boolean value stating whether the
  corresponding element in a vector / DF
  is Missing / NA or not "

Age = c(23,25,32,53,32,NA,55,43,NA,NA)

is.na(Age)

which(!is.na(Age))

sum(!is.na(Age))



x = iris[sample(1:150,size = 10,replace = F) ,]



" head(,n = ) "

" To View the first n rows of the Data 
  By default n = 6 "

head(iris)
head(iris,n = 10)
head(iris,10)

?head()

View(iris[c(12,54,32,67),])


" tail(,n = ) "

" To View the bottom n rows of the Data 
  By default n = 6 "


tail(iris)
tail(iris,n = 10)

tail(iris)

View(head(x))



" colSums() "

" Compute Column Totals for a Matrix / DF "

colSums(head(iris[,1:3],n = 10))


" rowSums() "

" Compute Rows Totals for a Matrix / DF"

rowSums(head(iris[,1:3]))


View(head(iris[,-c(4,5)]))





" colnames() "

" To print the Column names of a DF / Matrix "

colnames(data) = c(1,2,3,4,5)
colnames(data)


" Downloading and Importing Libraries "

" At times, the Base package doesn't provide
  all required functions.
  Hence, we need to use function(s) available
  in other libraries "


# Syntax to download a package
# install.packages("<Package Name>")

# Syntax to load a package
# library(<Package Name>)


install.packages("dplyr")

library(dplyr)


" Filtering Data "

" To extract a few columns
            OR
  Extract rows based on some condition "

library(dplyr)


" Using subset() function "
" Select only 2 columns: Sepal.Length & Petal.Width "

# Syntax:
# subset(<DF>,
#        <condition(s)>,
#        select = c(<col names>))

# select = , allows to extract the
# columns names specified

filter = subset(iris,
                select = c("Sepal.Length",
                             "Petal.Width"))

x = iris[ , c("Sepal.Length",
            "Petal.Width")]


" Select only those rows: Sepal.Length >= 2.5 "


filter_1 = subset(iris, 
                  Sepal.Length >= 5 &
                    Species == "setosa",
                  select = c("Species",
                             "Petal.Length",
                             "Petal.Width",
                             "Sepal.Width",
                             "Sepal.Length"))

# NOTE:
# We can specify multiple criteria(s)
# just after specifying the Data

# Question:
# Which Species have the highest variance in
# Petal Length?

var(subset(iris,
       Species == "setosa")$Petal.Length)

var(subset(iris,
           Species == "versicolor",
           select = "Petal.Length"))

var(subset(iris,
           Species == "virginica",
           select = "Petal.Length"))



X = iris[iris$Species == "setosa" , 
         "Petal.Length"]

var(X)


" Sorting Columns in Data Frames
  using arrange() in ascending order "

" For Eg. Sort the entire iris data set by
  Petal.Length "

# Syntax:
# arrange(<DF>,
#         <column name(s) you want to sort with>)

data = arrange(iris,Sepal.Length)
View(data)


" Sorting Columns in Data Frames
  using arrange() in descending order "

data = arrange(iris,desc(Sepal.Length))
View(data)



" Creating a New column in a DataFrame "

" Use of '$' 
  '$' is used to extract columns of a DF "

data[,"Species"]

head(iris$Sepal.Length)

data$Species

" For Eg.
  Add a New column 'Total' which is the 
  addition of all the columns "

# Syntax:
# DF $ <New Column Name> = <expression / Value>

data = iris
data$TEMP = "Exp"

data$Total = data$Sepal.Length + 
  data$Sepal.Width + 
  data$Petal.Length + 
  data$Petal.Width

data$Total = data[,1] + data[,2] + data[,3]
             
             
" Alternative "

x = rowSums(iris[,1:4])

data$Count = 1
